function [net, currSample_video, video_masked, error_recover]=process_epoch_STGConvNet_inpainting(para, net, currSample_video, video_masked)


    numLayers=numel(net.layers);

    %% compute statistics of the observed sequence
    res_obs = struct(...
        'x', cell(1,numLayers+1), ...
        'indicator', cell(1,numLayers+1), ...
        'stat_weights', cell(1,numLayers+1), ...
        'stat_bias', cell(1,numLayers+1));

    res_obs(1).x = video_masked;  % the first layer of response map is the observed signal

    for l=1:numLayers 

        tic
        res_obs(l+1).x = mex_conv3d(res_obs(l).x, net.layers{l}.filters, net.layers{l}.bias, 'pad', net.layers{l}.pad, 'stride', net.layers{l}.stride);
        res_obs(l+1).x = vl_nnrelu(res_obs(l+1).x);
        res_obs(l+1).indicator = vl_nnrelu(res_obs(l+1).x,  gpuArray(ones(size(res_obs(l+1).x),'single')));


                 if l==numLayers && para.FC==true
                     res_obs(l+1).indicator = gpuArray(ones(1,'single'));
                 end



        [~, res_obs(l+1).stat_weights, res_obs(l+1).stat_bias] = mex_conv3d(res_obs(l).x, net.layers{l}.filters, net.layers{l}.bias, res_obs(l+1).indicator, 'pad', net.layers{l}.pad, 'stride', net.layers{l}.stride);

        % clear res_obs(l).x indicator_FirstLayer

        res_obs(l+1).stat_weights = gather(res_obs(l+1).stat_weights * (1/para.numVideos));
        res_obs(l+1).stat_bias = gather(res_obs(l+1).stat_bias * (1/para.numVideos));
        disp(['compute the observed statistics in layer '  num2str(l) ' : ' num2str(toc)]);

    end

    
    %% filling
    image_temp=sampling_sequence_by_Langevin(net, para.L, para.stepsize, video_masked, para.FC);
    video_masked(para.masks==1) = image_temp(para.masks==1);
    
    %% sampling by Langevin dynamics
    currSample_video = sampling_sequence_by_Langevin(net, para.L, para.stepsize, currSample_video, para.FC);
    
    %% evaluate the error
    error_recover = gather(mean(abs(para.imageSet(para.masks==1)- video_masked(para.masks==1) )));
    disp(['The average error of recovery is '  num2str(error_recover)]);
    
    
    %% compute statistics of the synthesized sequence
    res_syn = struct(...
        'x', cell(1,numLayers+1), ...
        'indicator', cell(1,numLayers+1), ...
        'stat_weights', cell(1,numLayers+1), ...
        'stat_bias', cell(1,numLayers+1));

    res_syn(1).x = currSample_video;

    for l=1:numLayers

        tic
        res_syn(l+1).x = mex_conv3d(res_syn(l).x, net.layers{l}.filters, net.layers{l}.bias, 'pad', net.layers{l}.pad, 'stride', net.layers{l}.stride);
        res_syn(l+1).x = vl_nnrelu(res_syn(l+1).x);
        res_syn(l+1).indicator = vl_nnrelu(res_syn(l+1).x, gpuArray(ones(size(res_syn(l+1).x),'single' )));


                 if l==numLayers && para.FC==true
                     res_syn(l+1).indicator = gpuArray(ones(1,'single'));
                 end




        [~, res_syn(l+1).stat_weights, res_syn(l+1).stat_bias] = mex_conv3d(res_syn(l).x, net.layers{l}.filters, net.layers{l}.bias, res_syn(l+1).indicator, 'pad', net.layers{l}.pad, 'stride', net.layers{l}.stride);

        res_syn(l+1).stat_weights = gather(res_syn(l+1).stat_weights * (1/para.numChain));
        res_syn(l+1).stat_bias = gather(res_syn(l+1).stat_bias * (1/para.numChain));
        disp(['compute the synthesized statistics in layer '  num2str(l) ' : ' num2str(toc)]);

    end


    for l=1:numLayers

        %% compute the gradient for weights and bias
        gradient_weight = res_obs(l+1).stat_weights - res_syn(l+1).stat_weights;
        gradient_bias = res_obs(l+1).stat_bias - res_syn(l+1).stat_bias;

        disp(['Layer ' num2str(l) ': SSD_weight: ' num2str(mean(abs(gradient_weight(:))))]);

        %% update the weights and bias
        for iFilter = 1:net.layers{l}.numFilter
            adaptivelambdaLearningRate = net.layers{l}.lambdaLearningRate / (res_obs(l+1).stat_bias(iFilter) +eps);
            net.layers{l}.filters(:,:,:,:,iFilter)=net.layers{l}.filters(:,:,:,:,iFilter)+ adaptivelambdaLearningRate * gradient_weight(:,:,:,:,iFilter);
            net.layers{l}.bias(iFilter) = net.layers{l}.bias(iFilter) + adaptivelambdaLearningRate * gradient_bias(iFilter);
        end

    end
    